Timothy Tickle and Brian Haas
January 2017
Please follow along using the cut_and_paste.txt that comes with this repo.
If code is cut off here in the presentation, it will be available in full in the cut_and_paste.txt document.
# Load libraries
library(dplyr) # Dataframe manipulation
library(MAST) # For DE
library(Matrix) # Sparse matrices
library(useful) # Corner function
library(vioplot) # Violin pots
library(Seurat) # Single cell Analysis
source("./src/dotplot.R")
source("./src/makeNiceMAST.R")
load("./data/bipolar_raw.Robj")
# 24904 x 44994 (1 minute)
bipolar.data <- Matrix(as.matrix(bipolar_dge), sparse=TRUE)
# Memory use as a normal matrix
object.size(bipolar_dge)
8971430520 bytes
# Memory use as a sparse matrix
# ~23 X less
object.size(bipolar.data)
393096408 bytes
bipolar_dge <- NULL
# Expected raw counts (non-normalized data)
# Can give log transformed data but do not transform in setup method
bipolar.seurat.raw <- new("seurat", raw.data=bipolar.data)
# Display the internal pieces of the Seurat Object
slotNames(bipolar.seurat.raw)
[1] "raw.data" "data" "scale.data"
[4] "var.genes" "is.expr" "ident"
[7] "pca.x" "pca.rot" "emp.pval"
[10] "kmeans.obj" "pca.obj" "gene.scores"
[13] "drop.coefs" "wt.matrix" "drop.wt.matrix"
[16] "trusted.genes" "drop.expr" "data.info"
[19] "project.name" "kmeans.gene" "kmeans.cell"
[22] "jackStraw.empP" "jackStraw.fakePC" "jackStraw.empP.full"
[25] "pca.x.full" "kmeans.col" "mean.var"
[28] "imputed" "mix.probs" "mix.param"
[31] "final.prob" "insitu.matrix" "tsne.rot"
[34] "ica.rot" "ica.x" "ica.obj"
[37] "cell.names" "cluster.tree" "snn.sparse"
[40] "snn.dense" "snn.k"
head(bipolar.seurat.raw@raw.data)
6 x 44994 sparse Matrix of class "dgCMatrix"
0610005C13Rik . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
0610007P14Rik . . . . 2 . . . 1 . . . . . . . . . . . . . . . . . . . . .
0610009B22Rik . 1 . . 1 . . . . . . 1 . . 1 . 1 2 . . . . . . . 1 . . . .
0610009E02Rik . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
0610009L18Rik . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
0610009O20Rik 2 . . . . . . . . . . . . . . . . . 1 . . . . 1 . . . . . .
0610005C13Rik . ......
0610007P14Rik . ......
0610009B22Rik . ......
0610009E02Rik . ......
0610009L18Rik . ......
0610009O20Rik . ......
.....suppressing columns in show(); maybe adjust 'options(max.print= *, width = *)'
..............................
head(bipolar.seurat.raw@data)
NULL
head(bipolar.seurat.raw@ident)
logical(0)
head(bipolar.seurat.raw@var.genes)
logical(0)
?seurat
# Gene names (row names)
head(row.names(bipolar.seurat.raw@raw.data))
[1] "0610005C13Rik" "0610007P14Rik" "0610009B22Rik" "0610009E02Rik"
[5] "0610009L18Rik" "0610009O20Rik"
length(row.names(bipolar.seurat.raw@raw.data))
[1] 24904
# Column names # 24904 x 44994
# Sample / Cell names / Barcodes
head(colnames(bipolar.seurat.raw@raw.data),3)
[1] "Bipolar1_CCCACAAGACTA" "Bipolar1_TCGCCTCGTAAG" "Bipolar1_CAAAGCATTTGC"
length(colnames(bipolar.seurat.raw@raw.data))
[1] 44994
dim(bipolar.seurat.raw@raw.data)
[1] 24904 44994
# Only the corner
# The full data will be too large to see
corner(as.matrix(bipolar.seurat.raw@raw.data))
Bipolar1_CCCACAAGACTA Bipolar1_TCGCCTCGTAAG
0610005C13Rik 0 0
0610007P14Rik 0 0
0610009B22Rik 0 1
0610009E02Rik 0 0
0610009L18Rik 0 0
Bipolar1_CAAAGCATTTGC Bipolar1_CTTTTGATTGAC
0610005C13Rik 0 0
0610007P14Rik 0 0
0610009B22Rik 0 0
0610009E02Rik 0 0
0610009L18Rik 0 0
Bipolar1_GCTCCAATGACA
0610005C13Rik 0
0610007P14Rik 2
0610009B22Rik 1
0610009E02Rik 0
0610009L18Rik 0
# Plot genes per cell
# How many genes expressed per cells
# 3 Minutes
complexity.per.cell <- apply(bipolar.seurat.raw@raw.data,
2, function(x) sum(x>0))
# Mean count per cell.
# 3 minutes
mean.count.per.cell <- apply(bipolar.seurat.raw@raw.data,
2, function(x) mean(x))
# Gene prevalence
# 5 minutes
gene.prevalence <- apply(bipolar.seurat.raw@raw.data,
1, function(x) sum(x>0))
# Get batches based on cell names
technical_batches <- sapply(colnames(bipolar.seurat.raw@raw.data),
FUN=function(x){strsplit(x,"_",fixed=TRUE)[[1]][1]})
# Turn to numbers and add cell names to them
technical_batches <- as.numeric(as.factor(technical_batches))
names(technical_batches) <- colnames(bipolar.seurat.raw@raw.data)
# Plot genes per cell
# How many genes expressed per cell
# Violin plot
vioplot(complexity.per.cell)
# Add points
stripchart(complexity.per.cell, add=TRUE, vertical=TRUE, method="jitter", jitter=0.3, pch=20, col="#00000033")
# Add lines for reference
abline(h=500, col="orange")
abline(h=3000, col="green")
# Add text
title("Study Complexity")
axis(side=1,at=1,labels=c("Study"))
# Plot cell complexity, sorted by complexity
plot(1:length(complexity.per.cell),
sort(complexity.per.cell),
xlab="Cells ranked by complexity (Less to More)",
ylab="Complexity")
# Add lines
abline(h=500, col="orange")
abline(h=3000, col="green")
title("Complexity from Less to More")
# Plot genes per cell
# Complexity per technical batch
plot(complexity.per.cell ~ jitter(technical_batches,2))
# Add line
abline(h=500, col="orange")
abline(h=3000, col="green")
# Add text
title("Study Complexity")
axis(side=1,at=1,labels=c("Study"))
# Complexity by mean expression
plot(complexity.per.cell, mean.count.per.cell)
# Add lines
abline(v=500, col="orange")
abline(v=3000, col="green")
# How often genes are seen in cells
hist(log2(gene.prevalence), breaks=100)
# add line
abline(v=log(30), col="orange")
# From 24904 x 44994 to 14575 x 30673
# 5 minutes
bipolar.seurat <- Setup(bipolar.seurat.raw,
min.cells=30, min.genes=500,
do.logNormalize=TRUE,
total.expr=1e4,
project="Tutorial")
[1] "Performing log-normalization"
|
| | 0%
|
|== | 3%
|
|==== | 6%
|
|====== | 10%
|
|======== | 13%
|
|========== | 16%
|
|============= | 19%
|
|=============== | 23%
|
|================= | 26%
|
|=================== | 29%
|
|===================== | 32%
|
|======================= | 35%
|
|========================= | 39%
|
|=========================== | 42%
|
|============================= | 45%
|
|=============================== | 48%
|
|================================== | 52%
|
|==================================== | 55%
|
|====================================== | 58%
|
|======================================== | 61%
|
|========================================== | 65%
|
|============================================ | 68%
|
|============================================== | 71%
|
|================================================ | 74%
|
|================================================== | 77%
|
|==================================================== | 81%
|
|======================================================= | 84%
|
|========================================================= | 87%
|
|=========================================================== | 90%
|
|============================================================= | 94%
|
|=============================================================== | 97%
|
|=================================================================| 100%
[1] "Scaling data matrix"
|
| | 0%
|
|==== | 7%
|
|========= | 13%
|
|============= | 20%
|
|================= | 27%
|
|====================== | 33%
|
|========================== | 40%
|
|============================== | 47%
|
|=================================== | 53%
|
|======================================= | 60%
|
|=========================================== | 67%
|
|================================================ | 73%
|
|==================================================== | 80%
|
|======================================================== | 87%
|
|============================================================= | 93%
|
|=================================================================| 100%
bipolar.seurat.raw <- NULL
dim(bipolar.seurat@raw.data)
[1] 24904 44994
dim(bipolar.seurat@data)
[1] 14575 30673
head(bipolar.seurat@data.info)
nGene nUMI orig.ident
Bipolar1_CCCACAAGACTA 3468 8083 Bipolar1
Bipolar1_TCGCCTCGTAAG 4495 4541 Bipolar1
Bipolar1_CAAAGCATTTGC 1674 3131 Bipolar1
Bipolar1_CTTTTGATTGAC 1727 3321 Bipolar1
Bipolar1_GCTCCAATGACA 1845 3468 Bipolar1
Bipolar1_AAATACCCTCAT 1635 3004 Bipolar1
# Get gene names
mito.gene.names <- grep("^mt-", rownames(bipolar.seurat@data), value=TRUE)
# Get TSS normalized mitochodrial counts
# Cell total expression
col.total.counts <- Matrix::colSums(expm1(bipolar.seurat@data))
# Scale each count by the cell total
mito.percent.counts <- Matrix::colSums(expm1(bipolar.seurat@data[mito.gene.names, ]))/col.total.counts
# Add to seurat object as a metadata
bipolar.seurat <- AddMetaData(bipolar.seurat, mito.percent.counts, "percent.mitochodrial")
# Check
head(bipolar.seurat@data.info)
nGene nUMI orig.ident percent.mitochodrial
Bipolar1_CCCACAAGACTA 3468 8083 Bipolar1 0.025170490
Bipolar1_TCGCCTCGTAAG 4495 4541 Bipolar1 0.002911534
Bipolar1_CAAAGCATTTGC 1674 3131 Bipolar1 0.029411765
Bipolar1_CTTTTGATTGAC 1727 3321 Bipolar1 0.031975867
Bipolar1_GCTCCAATGACA 1845 3468 Bipolar1 0.020809249
Bipolar1_AAATACCCTCAT 1635 3004 Bipolar1 0.024333333
VlnPlot(bipolar.seurat, "nGene")
VlnPlot(bipolar.seurat, "nUMI")
VlnPlot(bipolar.seurat, "percent.mitochodrial")
VlnPlot(bipolar.seurat, c("nGene", "nUMI", "percent.mitochodrial"), nCol=3)
GenePlot(bipolar.seurat, "nUMI", "percent.mitochodrial")
# Filter libraries with more than 10% mitochondrially derived transcripts
# From 14575 x 30673 to 14575 x 27489
dim(bipolar.seurat@data)
[1] 14575 30673
# 1 Minute
# Filter max complexity
bipolar.seurat <- SubsetData(bipolar.seurat, subset.name="nGene", accept.high=3000)
# Filter percent percent mitochondrial reads
bipolar.seurat <- SubsetData(bipolar.seurat, subset.name="percent.mitochodrial", accept.high=0.1)
dim(bipolar.seurat@data)
[1] 14575 27489
table(bipolar.seurat@data.info[["orig.ident"]])
Bipolar1 Bipolar2 Bipolar3 Bipolar4 Bipolar5 Bipolar6
3053 3419 3661 3849 7127 6380
# Original standard matrix 8,971,430,520 bytes
object.size(bipolar.seurat)
3910195000 bytes
Saving the Seurat object.
# 3.16 GB
# save(bipolar.seurat, file="./data/bipolar_seurat.Robj")
# load("./data/bipolar_seurat.Robj")
You may need to export data to import into other applications.
# Log-scale expression matrix
# 5 minutes (1.2 GB)
# write.table(as.matrix(bipolar.seurat@data), file="bipolar_data.txt")
# Study metadata (1.2 MB)
# write.table(bipolar.seurat@data.info, file="bipolar_metadata.txt")
Plotting genes across groups.
VlnPlot(bipolar.seurat, "Prkca")
VlnPlot(bipolar.seurat, "Prkca", group.by="orig.ident")
# Plot a gene vs a gene
GenePlot(bipolar.seurat,"Isl1","Grm6",cex.use=1)
# Create batch affect (5 and 6 as a group)
batch.effect <- technical_batches[row.names(bipolar.seurat@data.info)]
batch.effect[batch.effect %in% c(1,2,3,4)] <- 1
batch.effect[batch.effect %in% c(5,6)] <- 2
# Add batch affect
bipolar.seurat <- AddMetaData(bipolar.seurat, batch.effect, "batch.effect")
# Regress on metadata
bipolar.seurat <- RegressOut(bipolar.seurat, latent.vars="batch.effect")
[1] "Regressing out batch.effect"
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|==== | 7%
|
|===== | 8%
|
|====== | 9%
|
|====== | 10%
|
|======= | 10%
|
|======= | 11%
|
|======== | 12%
|
|======== | 13%
|
|========= | 14%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 16%
|
|=========== | 17%
|
|============ | 18%
|
|============ | 19%
|
|============= | 20%
|
|============= | 21%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 23%
|
|================ | 24%
|
|================ | 25%
|
|================= | 26%
|
|================= | 27%
|
|================== | 27%
|
|================== | 28%
|
|=================== | 29%
|
|==================== | 30%
|
|==================== | 31%
|
|==================== | 32%
|
|===================== | 32%
|
|===================== | 33%
|
|====================== | 34%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 36%
|
|======================== | 37%
|
|======================== | 38%
|
|========================= | 38%
|
|========================= | 39%
|
|========================== | 40%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 42%
|
|============================ | 43%
|
|============================ | 44%
|
|============================= | 45%
|
|============================== | 46%
|
|============================== | 47%
|
|=============================== | 47%
|
|=============================== | 48%
|
|================================ | 49%
|
|================================ | 50%
|
|================================= | 51%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 53%
|
|=================================== | 54%
|
|==================================== | 55%
|
|===================================== | 56%
|
|===================================== | 57%
|
|===================================== | 58%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 60%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 62%
|
|========================================= | 63%
|
|========================================= | 64%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 66%
|
|============================================ | 67%
|
|============================================ | 68%
|
|============================================= | 68%
|
|============================================= | 69%
|
|============================================= | 70%
|
|============================================== | 71%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================ | 73%
|
|================================================ | 74%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 77%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 79%
|
|==================================================== | 80%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|====================================================== | 83%
|
|====================================================== | 84%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 86%
|
|========================================================= | 87%
|
|========================================================= | 88%
|
|========================================================== | 89%
|
|========================================================== | 90%
|
|=========================================================== | 90%
|
|=========================================================== | 91%
|
|============================================================ | 92%
|
|============================================================= | 93%
|
|============================================================= | 94%
|
|============================================================= | 95%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|================================================================ | 99%
|
|=================================================================| 99%
|
|=================================================================| 100%
[1] "Scaling data matrix"
|
| | 0%
|
|==== | 7%
|
|========= | 13%
|
|============= | 20%
|
|================= | 27%
|
|====================== | 33%
|
|========================== | 40%
|
|============================== | 47%
|
|=================================== | 53%
|
|======================================= | 60%
|
|=========================================== | 67%
|
|================================================ | 73%
|
|==================================================== | 80%
|
|======================================================== | 87%
|
|============================================================= | 93%
|
|=================================================================| 100%
# 2 minutes
bipolar.seurat <- MeanVarPlot(bipolar.seurat,
fxn.x=expMean,
fxn.y=logVarDivMean,
x.low.cutoff=0.0125,
x.high.cutoff=3,
y.cutoff=0.5, do.contour=FALSE,
do.plot=FALSE)
[1] "Calculating gene dispersion"
|
| | 0%
|
|==== | 7%
|
|========= | 13%
|
|============= | 20%
|
|================= | 27%
|
|====================== | 33%
|
|========================== | 40%
|
|============================== | 47%
|
|=================================== | 53%
|
|======================================= | 60%
|
|=========================================== | 67%
|
|================================================ | 73%
|
|==================================================== | 80%
|
|======================================================== | 87%
|
|============================================================= | 93%
|
|=================================================================| 100%
length(bipolar.seurat@var.genes)
[1] 2285
# 9 minutes
bipolar.seurat <- PCA(bipolar.seurat,
pc.genes=bipolar.seurat@var.genes,
do.print=FALSE)
# save(bipolar.seurat, file="./data/bipolar_seurat_pca.Robj")
# Calculate PCA projection
bipolar.seurat <- ProjectPCA(bipolar.seurat)
|
| | 0%
|
|==== | 7%
|
|========= | 13%
|
|============= | 20%
|
|================= | 27%
|
|====================== | 33%
|
|========================== | 40%
|
|============================== | 47%
|
|=================================== | 53%
|
|======================================= | 60%
|
|=========================================== | 67%
|
|================================================ | 73%
|
|==================================================== | 80%
|
|======================================================== | 87%
|
|============================================================= | 93%
|
|=================================================================| 100%
[1] "PC1"
[1] "Cplx3" "Snap25" "Gng13" "Syt1" "Trpm1"
[6] "Gnao1" "Mir124-2hg" "Calm1" "Lrtm1" "Pcp2"
[11] "Atp2b1" "Nme1" "Otx2" "Isl1" "Gnb3"
[16] "B3galt2" "Chgb" "Pcp4" "Neurod4" "Nap1l5"
[21] "Map4" "Nsf" "Cabp5" "Scg2" "Trnp1"
[26] "Aplp2" "Meg3" "Unc119" "Sncb" "mt-Cytb"
[1] ""
[1] "Dkk3" "Rlbp1" "Slc1a3" "Apoe" "Mfge8" "Cd9" "Car14"
[8] "Cp" "Ptn" "Sparc" "Spc25" "Aqp4" "Glul" "Crym"
[15] "Col9a1" "Prdx6" "Clu" "Hes1" "Timp3" "Pdpn" "Gpr37"
[22] "Fxyd1" "Egr1" "Jun" "Kdr" "Dbi" "Tsc22d4" "Gpm6b"
[29] "Espn" "Dapl1"
[1] ""
[1] ""
[1] "PC2"
[1] "Prkca" "Car8" "Ablim1" "Chgb" "Pcp2" "Calm1"
[7] "Vstm2b" "Trpm1" "Ccdc136" "Qpct" "Strip2" "Ift20"
[13] "Sebox" "Mt1" "Pcp4" "Tpbg" "Adrb1" "Zbtb20"
[19] "Adamts5" "Kcne2" "Isl1" "Rpa1" "Casp7" "Cacna2d3"
[25] "Tgfb2" "Gng13" "Lin7a" "Gpr179" "Mt2" "Cep112"
[1] ""
[1] "App" "Scgn" "Gria2" "Lbh"
[5] "Gucy1a3" "Samsn1" "Sncb" "Ptprz1"
[9] "Gsg1" "Fam19a3" "Tubb2a" "Snhg11"
[13] "Gngt1" "Cadm3" "Gngt2" "Slc24a3"
[17] "A530058N18Rik" "Pcdh9" "Rpgrip1" "A730046J19Rik"
[21] "Zeb2" "Slitrk6" "Cxxc5" "Fezf2"
[25] "Drd1" "Cplx4" "Zfp804b" "Trib2"
[29] "Otor" "Lhx4"
[1] ""
[1] ""
[1] "PC3"
[1] "Cdh9" "Neurod2" "Lmo4" "BC046251"
[5] "Nfia" "Medag" "Hs3st4" "Hpca"
[9] "Cplx4" "Gas6" "Meis2" "Sulf2"
[13] "Atp2b1" "St18" "Vipr2" "Gm10605"
[17] "Kcng4" "Gnb3" "Epha7" "Gng13"
[21] "Ptprt" "1810041L15Rik" "Gucy1a3" "Gm4792"
[25] "Cxcl14" "Adcy2" "Grm6" "RP23-363M4.1"
[29] "Isl1" "Sox6"
[1] ""
[1] "Slit2" "Rcvrn" "Tacr3" "A730046J19Rik"
[5] "Zfhx4" "Rpgrip1" "A930003A15Rik" "Pdc"
[9] "Glra1" "Cdh8" "Gnat1" "Fezf2"
[13] "Rp1" "Nrl" "Rs1" "Esam"
[17] "Rho" "Gnb1" "Sag" "Pde6b"
[21] "Pde1a" "Slc24a1" "Nxph1" "Guca1b"
[25] "Slitrk6" "Aipl1" "Nr2e3" "Neto1"
[29] "Guk1" "Phyhipl"
[1] ""
[1] ""
[1] "PC4"
[1] "Tacr3" "Slit2" "Zfhx4" "A730046J19Rik"
[5] "Glra1" "Nxph1" "Cdh8" "Gabra1"
[9] "Neto1" "Cabp1" "Lhx3" "Pde1a"
[13] "Wls" "Ncam2" "Fezf2" "Meg3"
[17] "Lamp5" "Scg2" "Gabrb1" "Esam"
[21] "Scgn" "Tmeff2" "Slitrk6" "Camk4"
[25] "Fam19a3" "Phyhipl" "A330008L17Rik" "Casp3"
[29] "Guk1" "Pcdh10"
[1] ""
[1] "Pdc" "Gnat1" "Rp1" "Rs1" "Sag" "Rho"
[7] "Pde6b" "Nr2e3" "Slc24a1" "Guca1b" "Nrl" "Pde6g"
[13] "Cnga1" "Guca1a" "Rcvrn" "Rtbdn" "Pde6a" "Nxnl1"
[19] "Aipl1" "Prph2" "Rdh12" "Tulp1" "Gngt1" "Samd11"
[25] "AI847159" "Reep6" "Gnb1" "Pex5l" "Rgs9" "Vtn"
[1] ""
[1] ""
[1] "PC5"
[1] "Gsg1" "Pcp4" "Otor" "Nnat"
[5] "Grik1" "Fezf2" "Fam19a3" "A930003A15Rik"
[9] "Phyhipl" "Lhx4" "Cabp5" "Dner"
[13] "Kcnip4" "Lrrtm3" "Nfib" "Cacna2d2"
[17] "A930009A15Rik" "Cacng2" "Prkar2b" "Sphkap"
[21] "Gngt1" "Cntn4" "Rpgrip1" "Gng2"
[25] "Cacna2d1" "Nxph3" "Cnnm1" "Lbh"
[29] "Slc16a7" "Slc24a3"
[1] ""
[1] "Vsx1" "Pcp4l1" "Cck" "Cdh8"
[5] "Reln" "Tnnt1" "Lect1" "Gjd2"
[9] "Zfhx4" "Scg2" "Pcdh9" "Lhx3"
[13] "A530058N18Rik" "Spock3" "A330008L17Rik" "Sec1"
[17] "Six3" "Kcnab1" "Chrna6" "Igfn1"
[21] "Camkv" "Guk1" "Gng13" "Syt2"
[25] "Adra2c" "Mkx" "Smoc2" "Igf1"
[29] "Gria2" "Fn1"
[1] ""
[1] ""
# Can plot top genes for top components
PrintPCA(bipolar.seurat,
pcs.print=1:2,
genes.print=5,
use.full=TRUE)
[1] "PC1"
[1] "Cplx3" "Snap25" "Gng13" "Syt1" "Trpm1"
[1] ""
[1] "Dkk3" "Rlbp1" "Slc1a3" "Apoe" "Mfge8"
[1] ""
[1] ""
[1] "PC2"
[1] "Prkca" "Car8" "Ablim1" "Chgb" "Pcp2"
[1] ""
[1] "App" "Scgn" "Gria2" "Lbh" "Gucy1a3"
[1] ""
[1] ""
VizPCA(bipolar.seurat, pcs.use=1:2)
How can we use component loadings?
PCAPlot(bipolar.seurat, 1, 2)
PCHeatmap(bipolar.seurat, pc.use=1, cells.use=100, do.balanced=TRUE)
# Time Intensive
# Jackstraw
# bipolar.seurat <- JackStraw(bipolar.seurat, num.replicate = 100, do.print = FALSE)
How do we choose how many components to use?
# Time Efficient but more ad hoc
# Scree (elbow) plot
# 37 selected
PCElbowPlot(bipolar.seurat, num.pc=40)
# Calculate t-SNE Ordination
# 7 minutes
bipolar.seurat <- RunTSNE(bipolar.seurat,
dims.use=1:37,
do.fast=TRUE)
# Plot
TSNEPlot(bipolar.seurat)
This is not PCA
PCA
t-SNE
# 14 minutes
# bipolar.seurat <- FindClusters(bipolar.seurat,
# pc.use=1:37,
# resolution=0.6,
# print.output=0,
# save.SNN=TRUE)
# save(bipolar.seurat, file="./data/bipolar_cluster_seurat.Robj")
load("./data/bipolar_cluster_seurat.Robj")
TSNEPlot(bipolar.seurat)
TSNEPlot(bipolar.seurat, do.label=TRUE)
Now that we have subclusters of cell populations, plotting genes through subclusters is identical to before.
VlnPlot(bipolar.seurat, "Prkca")
VlnPlot(bipolar.seurat, "Glul")
You can also gene expression through out the cell ordination.
FeaturePlot(bipolar.seurat,
c("Prkca","Glul", "Scgn", "Grm6"),
cols.use = c("grey","blue"))
FeaturePlot(bipolar.seurat,
"nGene",
cols.use = c("grey","blue"))
FeaturePlot(bipolar.seurat,
"percent.mitochodrial",
cols.use = c("grey","blue"))
Check for your batch affect.
# Making Fake Data
fake.sites <- as.integer(bipolar.seurat@ident %in% c(0))
names(fake.sites) <- names(bipolar.seurat@ident)
# Add metadata
bipolar.seurat <- AddMetaData(bipolar.seurat, fake.sites, "site")
# Plot feature
FeaturePlot(bipolar.seurat, c("site"), cols.use = c("green","orange"))
cell.labels <- bipolar.seurat@ident
corner(cell.labels)
Bipolar1_CAAAGCATTTGC Bipolar1_CTTTTGATTGAC Bipolar1_GCTCCAATGACA
5 4 0
Bipolar1_AAATACCCTCAT Bipolar1_TGCATGCGTCCA
3 5
Levels: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
# 2 minutes
cluster5.markers <- FindMarkers(bipolar.seurat, ident.1=5, min.pct = 0.25)
head(cluster5.markers, 30)
p_val avg_diff pct.1 pct.2
Cck 0 2.742669 0.692 0.008
Pcp4 0 -2.173863 0.170 0.653
Pcp2 0 -2.165046 0.294 0.742
Glul 0 -2.137603 0.161 0.310
Lect1 0 2.068691 0.547 0.032
Acsl3 0 -1.827424 0.160 0.417
Mt1 0 -1.754321 0.093 0.511
Tnnt1 0 1.749033 0.604 0.085
Prkca 0 -1.711193 0.053 0.383
Vsx1 0 1.686164 0.713 0.108
Gngt2 0 1.630449 0.694 0.124
Pcp4l1 0 1.600984 0.701 0.155
Cabp5 0 -1.586463 0.197 0.614
Scgn 0 1.578928 0.914 0.275
Scg2 0 1.571709 0.982 0.549
Unc13c 0 1.537642 0.550 0.105
Ablim1 0 -1.453118 0.085 0.405
Cdh8 0 1.450262 0.494 0.058
Spock3 0 1.397758 0.328 0.017
Gjd2 0 1.391591 0.526 0.103
Guk1 0 1.375674 0.850 0.349
Mt2 0 -1.270403 0.055 0.320
Ccdc136 0 -1.270049 0.086 0.409
Cntn4 0 -1.252449 0.028 0.278
Six3 0 1.216624 0.546 0.143
Car8 0 -1.175168 0.170 0.420
Zfhx4 0 1.171140 0.482 0.103
A530058N18Rik 0 1.159338 0.504 0.130
Ptprz1 0 1.152080 0.581 0.173
Lhx3 0 1.106063 0.299 0.029
# bipolar.markers <- FindAllMarkers(bipolar.seurat, only.pos=TRUE, min.pct=0.25, thresh.use=0.25)
# bipolar.markers %>% group_by(cluster) %>% top_n(2, avg_diff)
bimod: Tests differences in mean and proportions.
roc: Uses AUC like definition of separation.
t: Student's T-test.
tobit: Tobit regression on a smoothed data.
plot.genes <- head(rownames(cluster5.markers),30)
DoHeatmap(bipolar.seurat,
genes.use=plot.genes,
order.by.ident=TRUE,
slim.col.label=TRUE,
remove.key=TRUE)
dot.plot(bipolar.seurat,
features.use=plot.genes)
# Genes in tutorial
plot.genes = c("Vsx2","Otx2","Scgn","Isl1","Grm6",
"Apoe","Pax6","Rho","Arr3","Tacr3",
"Syt2","Neto1","Irx6","Prkar2b",
"Grik1","Kcng4","Cabp5","Vsx1","Prkca")
# Cell groups shown
only.use.groups = c(7,9,10,12,8,14,3,13,6,11,5,4,16,0)
# Plot
dot.plot(bipolar.seurat,
features.use=plot.genes,
subset.group=only.use.groups)
# Make a covariate to inform the test
test.5 <- as.character(bipolar.seurat@ident)
test.5[test.5 != "5"] <- "wild"
test.5[test.5 == "5"] <- "test"
names(test.5) <- names(bipolar.seurat@ident)
test.5 <- factor(test.5, levels=c("wild","test"))
# Load into Mast Object
# Mast object
# Data frame of cell-level covariates
# Data frame of feature-level covariates
bipolar.mast <- FromMatrix(as.matrix(bipolar.seurat@data),
cbind(bipolar.seurat@data.info,test.5),
data.frame(primerid=rownames(bipolar.seurat@data)))
# Run test
# 1 hr
# zlm.test.5 <- zlm.SingleCellAssay(~ test.5, bipolar.mast)
load("./data/mast_zlm.Robj")
df.test.5 <- data.frame(summary(zlm.test.5, doLRT="test.5test"))
summary.mast <- makeNice(df.test.5, val="test.5test")
summary.mast[abs(summary.mast[,5])>=1 & !is.na(summary.mast[,5]),]
Gene pval padj padj_strict logFC_test.5test
Cck Cck 0.000000e+00 0.000000e+00 0.000000e+00 2.037136
Cdh8 Cdh8 0.000000e+00 0.000000e+00 0.000000e+00 1.115389
Gjd2 Gjd2 0.000000e+00 0.000000e+00 0.000000e+00 1.105904
Gngt2 Gngt2 0.000000e+00 0.000000e+00 0.000000e+00 1.650368
Guk1 Guk1 0.000000e+00 0.000000e+00 0.000000e+00 1.721112
Lect1 Lect1 0.000000e+00 0.000000e+00 0.000000e+00 1.415152
Pcp2 Pcp2 0.000000e+00 0.000000e+00 0.000000e+00 -2.313532
Pcp4 Pcp4 0.000000e+00 0.000000e+00 0.000000e+00 -1.928654
Pcp4l1 Pcp4l1 0.000000e+00 0.000000e+00 0.000000e+00 1.598591
Ptprz1 Ptprz1 0.000000e+00 0.000000e+00 0.000000e+00 1.103493
Scg2 Scg2 0.000000e+00 0.000000e+00 0.000000e+00 2.396354
Scgn Scgn 0.000000e+00 0.000000e+00 0.000000e+00 2.306762
Six3 Six3 0.000000e+00 0.000000e+00 0.000000e+00 1.053435
Tnnt1 Tnnt1 0.000000e+00 0.000000e+00 0.000000e+00 1.450846
Unc13c Unc13c 0.000000e+00 0.000000e+00 0.000000e+00 1.231391
Vsx1 Vsx1 0.000000e+00 0.000000e+00 0.000000e+00 1.851497
Cabp5 Cabp5 2.231731e-257 1.282129e-254 1.296080e-253 -1.417557
Isl1 Isl1 4.754931e-192 2.114870e-189 2.137881e-188 1.313532
Mt1 Mt1 2.249854e-191 9.694058e-189 9.799535e-188 -1.228030
Gabra1 Gabra1 7.918514e-190 3.211190e-187 3.246130e-186 1.083016
plot.genes = c("Scg2","Scgn","Cck","Vsx1","Guk1",
"Gngt2","Pcp4l1","Pcp4","Pcp2")
dot.plot(bipolar.seurat,
features.use=plot.genes)